#!/usr/bin/python
# Prints COG statistics and also counts of individual COG category

import sys
import re

ortho_list_file = sys.argv[1]
f = open(ortho_list_file, "rU")
ol = f.readlines()

org_list_file = sys.argv[2]
olf = open(org_list_file, "rU")
oll = olf.readlines()

genome_orth_file = sys.argv[3]
gcf = open(genome_orth_file, "rU")
gcl = gcf.readlines()

#outfile = sys.argv[4]
#of = open(outfile, "w")

orth_ids = []

for line in ol:
    orth_id = line.rstrip()
    orth_ids.append(orth_id)

orgs = {}
for line in oll:
    l = line.split('\t')
    acc = l[0].rstrip()
    name = l[1].rstrip()
    orgs[acc] = name

orths_found = []

for line in gcl:
    orth_id = line.rstrip()
    orths_found.append(orth_id)

m1 = re.compile('(.*).orthologs.list')
acc_id = m1.match(genome_orth_file).group(1)
org_name = ""
if acc_id in orgs:
    org_name = orgs[acc_id]

orth_counts = []

print org_name

for x in orth_ids:
    c = orths_found.count(x)
    orth_counts.append(str(c))
    print c

#of.write(cog_counts)

f.close()
olf.close()
gcf.close()